---
title: "Preparing the Dependent Variable"
output:
  html_document:
    df_print: paged
---
General Note
------------

The 'Observatorio de Memoria y Conflicto' dataset of the CNMH (2023) consists of 11 pairs of datasets disaggregated by types of conflict events, and by events (cases) and victims of such events. In total, there are 22 individual datasets.

OMC only provides information on the perpetrators of violence in the datasets of cases. Thus, I process each pair of datasets (cases and victims) to determine the number of victimas of massacres and assassinations by rebel groups. Later, I add these victims and prepare the aggregated dataset.

The first step is to clean the datasets on events.

Updated Version:

```{r}
library(tidyverse)
library(readstata13)
library(foreign)
library(readxl)
library(writexl)

omc_cco <- read_csv("Ortega(2024)_OMC-CasosFinal_CivilianTarget-Updated(09132024).csv")
  # See the folder 'CV' for more information on how this dataset was prepared.

omc_v_as <- read_xlsx("VictimasAS_202309.xlsx")
  
omc_v_as <- omc_v_as %>%
  dplyr::rename(year = Año,
                month = Mes,
                day = Día,
                divipola_alt = "Código DANE de Municipio",
                mun_uc = Municipio,
                dep_uc = Departamento,
                id_event = "ID Caso",
                id_victim = "ID Persona",
                victim = "Calidad de la Víctima o la Baja",
                pol_m = "Militante Político",
                sex = Sexo,
                vuln_pop = "Tipo de Población Vulnerable",
                eth_g = Etnia,
                age = Edad,
                combat1 = "Fuerza o Grupo Armado Organizado al que Pertenece el Combatiente",
                combat2 = "Descripción Fuerza o Grupo Armado Organizado al que Pertenece el Combatiente",
                job = Ocupación)

omc_v_as <- omc_v_as %>%
  filter(victim == "CIVIL")

omc_v_as$id_event <- as.numeric(omc_v_as$id_event)
omc_v_as$divipola <- as.numeric(omc_v_as$divipola_alt)

omc_ve_as <- left_join(omc_v_as, omc_cco, by="id_event")

omc_ve_as <- omc_ve_as %>%
  filter(assassination == 1) 

# To control for selectivity
omc_ve_as <- omc_ve_as %>%
  mutate_at(vars(pol_m)
            , ~replace_na(., "None"))

omc_ve_as %>%
  group_by(Modalidad) %>%
  count()

omc_ve_as <- omc_ve_as %>%
  mutate(modality_sv = case_when(
    str_detect(Modalidad, "ASALTO") 
    | str_detect(Modalidad, "ATENTADO")
    | str_detect(Modalidad, "CITACIÓN")
    | str_detect(Modalidad, "ENGAÑO")
    | str_detect(Modalidad, "RETENCIÓN/EJECUCIÓN")
    | str_detect(Modalidad, "RUTA")
    | str_detect(Modalidad, "SICARIATO")
    ~ 1,
    TRUE ~ 0)
  )

omc_ve_as <- omc_ve_as %>%
  mutate(sv = case_when(
    pol_m != "None" | modality_sv == 1 ~ 1,
    pol_m == "None" & modality_sv == 0 ~ 0
  ))

omc_ve_as$year <- as.numeric(omc_ve_as$year.x)

# Now, only violence by RG
omc_ve_as_r <- omc_ve_as %>%
  filter(rebel_groups == 1) 

kciv_r <- omc_ve_as_r %>%
  group_by(divipola, year) %>%
  summarise(reb_ckill = n())

kciv_r_sv <- omc_ve_as_r %>%
  filter(sv == 1) %>%
  group_by(divipola, year) %>%
  summarise(reb_ckill_sv = n())

# Now, only violence by FARC
omc_ve_as_r <- omc_ve_as_r %>%
  mutate(farc = case_when(
    GrupoArmado1_name == "FARC" | GrupoArmado1_name == "FARC/ELN" | GrupoArmado1_name == "FARC/EPL"
    | GrupoArmado1_name == "COORDINADORA GUERRILLERA SIMÓN BOLÍVAR" ~ 1,
    TRUE ~ 0
  ))

omc_ve_as_farc <- omc_ve_as_r %>%
  filter(farc == 1) 

kciv_farc <- omc_ve_as_farc %>%
  group_by(divipola, year) %>%
  summarise(farc_ckill = n())

kciv_farc_sv <- omc_ve_as_farc %>%
  filter(sv == 1) %>%
  group_by(divipola, year) %>%
  summarise(farc_ckill_sv = n())

# Now, only violence by ELN
omc_ve_as_r <- omc_ve_as_r %>%
  mutate(eln = case_when(
    GrupoArmado1_name == "ELN" | GrupoArmado1_name == "FARC/ELN" | GrupoArmado1_name == "ELN/EPL"
    | GrupoArmado1_name == "ELN/ERP" | GrupoArmado1_name == "COORDINADORA GUERRILLERA SIMÓN BOLÍVAR"  ~ 1,
    TRUE ~ 0
  ))

omc_ve_as_eln <- omc_ve_as_r %>%
  filter(eln == 1) 

kciv_eln <- omc_ve_as_eln %>%
  group_by(divipola, year) %>%
  summarise(eln_ckill = n())

kciv_eln_sv <- omc_ve_as_eln %>%
  filter(sv == 1) %>%
  group_by(divipola, year) %>%
  summarise(eln_ckill_sv = n())

kciv_rg_t <- left_join(kciv_r, kciv_r_sv)
kciv_rg_t <- left_join(kciv_rg_t, kciv_farc)
kciv_rg_t <- left_join(kciv_rg_t, kciv_farc_sv)
kciv_rg_t <- left_join(kciv_rg_t, kciv_eln)
kciv_rg_t <- left_join(kciv_rg_t, kciv_eln_sv)

kciv_rg_t <- kciv_rg_t %>% replace(is.na(.), 0) 

write_xlsx(kciv_rg_t, "Ortega(2024)-PreProcessingOMC_VictimsR_AS-Updated(09162024).xlsx")
  # Checked -> M
  
# Now massacres

rm(list=setdiff(ls(), c("omc_cco", "omc_v_ma", "kciv_rg_t")))

omc_v_ma <- read_xlsx("VictimasMA_202309.xlsx")

omc_v_ma <- omc_v_ma %>%
  dplyr::rename(year = Año,
                month = Mes,
                day = Día,
                divipola_alt = "Código DANE de Municipio",
                mun_uc = Municipio,
                dep_uc = Departamento,
                id_event = "ID Caso",
                id_victim = "ID Persona",
                victim = "Calidad de la Víctima o la Baja",
                pol_m = "Militante Político",
                sex = Sexo,
                vuln_pop = "Tipo de Población Vulnerable",
                eth_g = Etnia,
                age = Edad,
                combat1 = "Fuerza o Grupo Armado Organizado al que Pertenece el Combatiente",
                combat2 = "Descripción Fuerza o Grupo Armado Organizado al que Pertenece el Combatiente",
                job = Ocupación)

omc_v_ma <- omc_v_ma %>%
  filter(victim == "CIVIL")

omc_v_ma$id_event <- as.numeric(omc_v_ma$id_event)
omc_v_ma$divipola <- as.numeric(omc_v_ma$divipola_alt)

omc_ve_ma <- left_join(omc_v_ma, omc_cco, by="id_event")

omc_ve_ma <- omc_ve_ma %>%
  filter(massacre == 1) 

# To control for selectivity
omc_ve_ma <- omc_ve_ma %>%
  mutate_at(vars(pol_m)
            , ~replace_na(., "None"))

omc_ve_ma <- omc_ve_ma %>%
  mutate(modality_sv = case_when(
    str_detect(Modalidad, "ASALTO") 
    | str_detect(Modalidad, "ATENTADO")
    | str_detect(Modalidad, "CITACIÓN")
    | str_detect(Modalidad, "ENGAÑO")
    | str_detect(Modalidad, "RETENCIÓN/EJECUCIÓN")
    | str_detect(Modalidad, "RUTA")
    | str_detect(Modalidad, "SICARIATO")
    ~ 1,
    TRUE ~ 0)
  )

omc_ve_ma <- omc_ve_ma %>%
  mutate(sv = case_when(
    pol_m != "None" | modality_sv == 1 ~ 1,
    pol_m == "None" & modality_sv == 0 ~ 0
  ))

omc_ve_ma$year <- as.numeric(omc_ve_ma$year.x)

# Now, only violence by RG
omc_ve_ma_r <- omc_ve_ma %>%
  filter(rebel_groups == 1) 

mciv_r <- omc_ve_ma_r %>%
  group_by(divipola, year) %>%
  summarise(reb_cmass = n())

mciv_r_sv <- omc_ve_ma_r %>%
  filter(sv == 1) %>%
  group_by(divipola, year) %>%
  summarise(reb_cmass_sv = n())

omc_ve_ma_r %>%
  group_by(GrupoArmado1_name) %>%
  count()
  
# Now, only violence by FARC
omc_ve_ma_r <- omc_ve_ma_r %>%
  mutate(farc = case_when(
    GrupoArmado1_name == "FARC" | GrupoArmado1_name == "FARC/ELN" | GrupoArmado1_name == "FARC/EPL"
    | GrupoArmado1_name == "COORDINADORA GUERRILLERA SIMÓN BOLÍVAR" ~ 1,
    TRUE ~ 0
  ))

omc_ve_ma_farc <- omc_ve_ma_r %>%
  filter(farc == 1) 

mciv_farc <- omc_ve_ma_farc %>%
  group_by(divipola, year) %>%
  summarise(farc_cmass = n())

mciv_farc_sv <- omc_ve_ma_farc %>%
  filter(sv == 1) %>%
  group_by(divipola, year) %>%
  summarise(farc_cmass_sv = n())

# Now, only violence by ELN
omc_ve_ma_r <- omc_ve_ma_r %>%
  mutate(eln = case_when(
    GrupoArmado1_name == "ELN" | GrupoArmado1_name == "FARC/ELN" | GrupoArmado1_name == "ELN/EPL"
    | GrupoArmado1_name == "ELN/ERP" | GrupoArmado1_name == "COORDINADORA GUERRILLERA SIMÓN BOLÍVAR"  ~ 1,
    TRUE ~ 0
  ))

omc_ve_ma_eln <- omc_ve_ma_r %>%
  filter(eln == 1) 

mciv_eln <- omc_ve_ma_eln %>%
  group_by(divipola, year) %>%
  summarise(eln_cmass = n())

mciv_eln_sv <- omc_ve_ma_eln %>%
  filter(sv == 1) %>%
  group_by(divipola, year) %>%
  summarise(eln_cmass_sv = n())

mciv_rg_t <- left_join(mciv_r, mciv_r_sv)
mciv_rg_t <- left_join(mciv_rg_t, mciv_farc)
mciv_rg_t <- left_join(mciv_rg_t, mciv_farc_sv)
mciv_rg_t <- left_join(mciv_rg_t, mciv_eln)
mciv_rg_t <- left_join(mciv_rg_t, mciv_eln_sv)

mciv_rg_t <- mciv_rg_t %>% replace(is.na(.), 0) 

write_xlsx(mciv_rg_t, "Ortega(2024)-PreProcessingOMC_VictimsR_MA-Updated(09162024).xlsx")
  # Checked

### Merge the two datasets

dv <- full_join(kciv_rg_t, mciv_rg_t)

dv <- dv %>%
 replace(is.na(.), 0)

dv <- dv %>%
  mutate(reb_cdv = reb_ckill + reb_cmass,
         reb_cdv_sv = reb_ckill_sv + reb_cmass_sv,
         farc_cdv = farc_ckill + farc_cmass,
         farc_cdv_sv = farc_ckill_sv + farc_cmass_sv,
         eln_cdv = eln_ckill + eln_cmass,
         eln_cdv_sv = eln_ckill_sv + eln_cmass_sv)

write_xlsx(dv, "Ortega(2024)-DV_FV-Updated(09162024).xlsx")
  # Checked -> M

```

A brief note on the name of the files:

I will keep the original names of the raw datasets used for the analysis. Whenever I have intervened or modified a dataset (e.g., by creating a dummy out of a numeric variable), I will name the dataset as, "Ortega(2024)...".

Note on Modalidad of assassination or massacre: Out of the 14 kinds of assassinations/massacres, I argue that 7 are selective, namely: 'asalto', 'atentado', 'citación', 'engaño', 'retención/ejecución', 'ruta', and 'sicariato'.

See the definition of the categories here: https://micrositios.centrodememoriahistorica.gov.co/observatorio/sievcac/definiciones/

**Bibliography**

CNMH. 2021. Bases de datos. Observatorio de Memoria y Conflicto. Fecha de corte 31/01/2021. Accessed at: http://micrositios.centrodememoriahistorica.gov.co/observatorio/portal-de-datos/el-conflicto-en-cifras/#base-de-datos, August 26 2021.

CNMH. n.d. "Definiciones. ¿Cómo pasó? Modalidades Transversales". Observatorio de Memoria y Conflicto. Accessed at: https://micrositios.centrodememoriahistorica.gov.co/observatorio/sievcac/definiciones/, September 16 2024.